import requests
import pandas as pd
from numpy import nan


class Wikifier(object):

    def __init__(self):
        self.headers = {'User-Agent': 'Mozilla/5.0'}
        self.url = 'http://macniece.seas.upenn.edu:4005/wikify'
        self.extract_dict = dict()
        self.ner = pd.DataFrame()
        self.init_table()

    def init_table(self):
        print("loading abstract...")
        extract_table = pd.read_csv("./data/abstract.csv")
        extract_table.set_index("word", inplace=True)
        self.extract_dict = extract_table.to_dict('index')
        print("finished")

        print("loading ner...")
        ner = pd.read_excel("./data/NER_US_ORG_PERSON.xlsx")
        ner.drop("Unnamed: 0", axis=1, inplace=True)
        ner["tag"] = ner["abstract"].apply(lambda x: "sentence" if x is nan else "dbpedia")
        ner["wiki_abstract_key"] = nan
        self.ner = ner
        print("finished")


    def call_wiki(self, text):
        data = {'json': 'true', 'text': '%s' % text}
        response = requests.post(self.url, headers=self.headers, data=data, timeout=10)
        return response.text

    def extract(self, text, sen):
        res = dict()
        is_success = 0
        label = "unknown"

        try:
            res = eval(self.call_wiki(sen))
            is_success = 1
        except Exception as e:
            print("Call wiki failed")
            print("Reason", e)

        if is_success:
            for term in res.get('wikifier',[]):
                if term['string'].lower() == text.lower():
                    label = term['label']
        return label

    def run(self, flag=0):
        if flag:
            print("load cash...")
            self.ner = pd.read_csv("./cash/limited_wikifier.csv")
            self.ner.drop("Unnamed: 0", axis=1, inplace=True)
        for i, dp in self.ner.iterrows():
            if int(i) < flag:
                continue
            if int(i) % 30 == 0:
                print("saving...")
                self.ner.to_csv("./cash/limited_wikifier.csv")

            text = dp["TEXT"]
            tag = dp["tag"]
            sentence = dp["Sentence"]

            if tag == 'dbpedia':
                print("skip dbpedia")
                continue

            key_label = self.extract(text, sentence)
            self.ner.loc[i, 'wiki_abstract_key'] = key_label
            value_label = self.extract_dict.get(key_label, 0)

            if value_label == 0:
                print("not found abstract")
                continue

            self.ner.loc[i, 'abstract'] = value_label['abstract']
            self.ner.loc[i, 'tag'] = 'tagger_to_finish'
            print(i, text, key_label)
        print("finished")
        print("saving...")
        self.ner.to_excel("./output/limited_wikifier.xlsx")
        self.ner.to_csv("./output/limited_wikifier.csv")

if __name__ == "__main__":
    wiki = Wikifier()
    wiki.run(7440)